use "master.dta", clear

****We start from the merged dataset.
*This .do file define cleans and prepares outcomes, covariates and defines group selection

*************************
** 1. FAMILY VARIABLES   **
*************************

set matsize 10000
destring familie_id, replace
sort familie_id aar
xtset familie_id aar

rename treat_aar treat_aar1 /*Treat_aar is the year where the IO period expires*/
tab treat_aar1
bys familie_id: egen treat_aar = min(treat_aar1)
tab treat_aar
drop treat_aar1
drop if treat_aar == . 

*Normalizing treatment year to 0
cap drop t_count
g t_count = aar - treat_aar

*drop if self-employed
cap drop temp
g temp = selvst 
cap drop selvst_panel
bys familie_id: egen selvst_panel = max(temp)
tab selvst_panel
drop if selvst_panel == 1

*drop if not tax liable 
cap drop temp
g temp = ikkeskattepligtig 
cap drop ikkeskattepligtig_panel
bys familie_id: egen ikkeskattepligtig_panel = max(temp)
tab ikkeskattepligtig_panel
drop if ikkeskattepligtig_panel == 1

*drop if property buying in years {-1,0,1}
recode koebt . = 0
cap drop temp
g temp = ((koebt == 1 | solgt == 1 | ejerskift == 1) & inrange(t_count,-1,1))
tab temp
cap drop koebere_frasort
bys familie_id: egen koebere_frasort=max(temp)
tab koebere_frasort
drop if koebere_frasort==1 
			
gen m_IOstart = month(ym_afstart)
gen ym_IOstart = ym_afstart
format ym_IOstart %tdCCYYNN 

tab m_IOstart treat_aar
g temp_IOstart = (inrange(m_IOstart,1,3))
recode m_IOstart . = 0
tab temp_IOstart
bys familie_id: egen IOstart_frasort=max(temp_IOstart)
bys familie_id: egen IOstart_panel=max(m_IOstart)

**********************************
** 2 COVARIATES					**
**********************************
*Family structure
cap drop numkids
g numkids = 0 if  famantboernf == 0
	replace numkids = 1 if  famantboernf == 1
	replace numkids = 2 if  famantboernf == 2
	replace numkids = 3 if  famantboernf>=3 & famantboernf != .

cap drop kids
g kids = 0
	replace kids = 1 if famantboernf != 0
cap drop temp
g temp = numkids if t_count == -2
cap drop numkids_t2
bys familie_id: egen numkids_m1 = max(temp)

cap drop civilstatus
g civilstatus = .
	replace civilstatus=0 if civst_1=="U"
	replace civilstatus=1 if civst_1=="G"
	replace civilstatus=2 if civst_1=="F"
	replace civilstatus=3 if civst_1=="P"
	replace civilstatus=4 if civst_1=="E"
	replace civilstatus=5 if civst_1=="L"
	replace civilstatus=6 if civst_1=="O"
tab civilstatus

g married = 0
	replace married = 1 if civst_1 == "G"
cap drop temp
g temp = married if t_count == -2
cap drop married_t2
bys familie_id: egen married_t2 = max(temp)

cap drop dti
g dti = famqpassivn/famperindkialt_13

cap drop bidragsrate
by familie_id (aar), sort: gen bidragsrate = (fambidrag/(famrestobl+famrestobl[_n-1])/2)

g male = (koen_1==1)
tab male

cap drop liq_assets
g liq_assets = fambankakt + famoblakt + famkursakt

recode famboligvaerdi . = 0
cap drop assets
g assets = fambankakt + famoblakt + famkursakt + famboligvaerdi

recode famrestobl . = 0
cap drop liabilities
g liabilities = fambankgaeld + famrestobl

cap drop ydelse
g ydelse= famafdrag_for + famrente_bet_nu + fambidrag  


*************************
** 3. LOAN VARIABLES   **
*************************

rename nytlaan newloan
recode newloan .=0
rename nytlaan_var_uden_afdrag newloan_AR_IO  /*Adjustable rate Interst only*/
recode newloan_AR_IO .=0
rename nytlaan_fast_uden_afdrag newloan_FR_IO /*Fixed rate Interst only*/
recode newloan_FR_IO .=0
rename nytlaan_var_med_afdrag newloan_AR_M    /*Adjustable rate paying back mortage*/
recode newloan_AR_M .=0
rename nytlaan_fast_med_afdrag newloan_FR_M   /*Fixed rate paying back mortage*/
recode newloan_FR_M .=0

recode overlev_lastyear . = 0
g newloan_io 	= (newloan_AR_IO==1 | newloan_FR_IO==1)
g newloan_noio 	= (newloan_AR_M==1 | newloan_FR_M==1)

g noloan		= (newloan==0 & overlev_lastyear==0)
g both_new_old 	= (newloan==1 & overlev_lastyear==1)
g only_newloan	= (newloan==1 & overlev_lastyear==0)
g only_loanstays= (newloan==0 & overlev_lastyear==1)

*New loan last year
cap drop new_lastyear
g new_lastyear = 0
	replace new_lastyear = 1 if newloan[_n-1] == 1 & familie_id == familie_id[_n-1]
tab new_lastyear
*New loan next year
cap drop new_nextyear
g new_nextyear = 0
	replace new_nextyear = 1 if newloan[_n+1] == 1  & familie_id == familie_id[_n+1]
tab new_nextyear

****

egen lenghtio=mean(l_afd), by (familie_id)
replace lenght=int(lenght)



** loan uptake is in treatment year and one year before and after;
*Loan stays after 9 years and no new loan
cap drop out_loan_stays
g out_loan_stays = 0
	replace out_loan_stays = 1 if overlev_lastyear == 1 & newloan == 0 ///
		    & new_lastyear == 0 & new_nextyear == 0 & treat_aar == aar /*out_loan_stays takes value 1 in year t+10 */
tab out_loan_stays if treat_aar == aar

*Uptake of new IO loan after 9 years
cap drop out_newloan_io
g out_newloan_io = 0		 
	replace out_newloan_io = 1 if (newloan_io == 1 | (newloan_io[_n+1] == 1 & familie_id == familie_id[_n+1]) ///
			| (newloan_io[_n-1] == 1 & familie_id == familie_id[_n-1])) & treat_aar == aar
tab out_newloan_io if treat_aar == aar

*Uptake of new loan (but no IO loan)
cap drop out_newloan_noio
g out_newloan_noio = 0		 
	replace out_newloan_noio = 1 if (newloan + new_lastyear + new_nextyear >= 1) ///
		    & out_newloan_io != 1  & treat_aar == aar
tab out_newloan_noio if treat_aar == aar

*Pays out loan after 9 years. Possible explanations: pay out loan, kontantlån, new loan earlier/later, ...
cap drop out_paysout
g out_paysout = 0
	replace out_paysout = 1 if (out_loan_stays + out_newloan_io + out_newloan_noio == 0) ///
		    & treat_aar == aar
tab out_paysout if treat_aar==aar

count if out_newloan_io + out_newloan_noio + out_loan_stays + out_paysout > 1

tabstat out_newloan_io out_newloan_noio out_loan_stays out_paysout ///
		if treat_aar==aar, by(treat_aar) stat(mean n) 

*New loan last year
cap drop out_new_lastyear
g out_new_lastyear = 0
	replace out_new_lastyear = 1 if newloan[_n-1] == 1 & familie_id == familie_id[_n-1] & treat_aar == aar
tab out_new_lastyear

*New loan next year
cap drop out_new_nextyear
g out_new_nextyear = 0
	replace out_new_nextyear = 1 if newloan[_n+1] == 1  & familie_id == familie_id[_n+1] & treat_aar == aar
tab out_new_nextyear

tab out_new_lastyear out_new_nextyear if treat_aar==aar
tab out_new_lastyear out_new_nextyear if treat_aar==aar & noloan==1

*Pays out loan
cap drop temp
g temp = out_paysout if aar != 2017
cap drop paysout_panel
bys familie_id: egen paysout_panel = max(temp)
cap drop out_sample
g out_sample = (familie_id != familie_id[_n+1] & aar != 2017)
tab out_sample if out_paysout == 1

bys out_paysout: sum famrestobl fambankgaeld famboligvaerdi if treat_aar==aar
sum famboligvaerdi if paysout_panel == 1 & t_count == -1
sum famboligvaerdi if paysout_panel == 1 & t_count == 0
sum famboligvaerdi if paysout_panel == 1 & t_count == 1
sum solgt koebt ejerskift if (t_count == -1 | t_count == 0 | t_count == 1) & paysout_panel == 1


*************************
** 4. OUTCOME VARIABLES   **
*************************


**Changes in dependent variables 
by familie_id (aar), sort: gen difffinaktiver = (fam_finaktiver - fam_finaktiver[_n-1])
by familie_id (aar), sort: gen diffindstpi = (famindestpi - famindestpi[_n-1])
by familie_id (aar), sort: gen diffbankgaeld = (fambankgaeld - fambankgaeld[_n-1])
by familie_id (aar), sort: gen diffconsump1a = (famforbrug1a - famforbrug1a[_n-1])
by familie_id (aar), sort: gen diffconsump3 = (famforbrug3 - famforbrug3[_n-1])
by familie_id (aar), sort: gen diffrestobl = (famrestobl - famrestobl[_n-1])
by familie_id (aar), sort: gen diffrestknt = (famrestknt - famrestobl[_n-1])
by familie_id (aar), sort: gen difffamafdrag_for = (famafdrag_for - famafdrag_for[_n-1])
by familie_id (aar), sort: gen diffliq_assets = (liq_assets - liq_assets[_n-1])
by familie_id (aar), sort: gen diffydelse = (ydelse - ydelse[_n-1])


*Normalizing changes by past income 
by familie_id (aar), sort: gen rdifffinaktiver = (difffinaktiver / famdispon_13[_n-1])
by familie_id (aar), sort: gen rdiffindstpi = (diffindstpi / famdispon_13[_n-1])
by familie_id (aar), sort: gen rdiffbankgaeld = (diffbankgaeld / famdispon_13[_n-1])
by familie_id (aar), sort: gen rdiffconsump1a = (diffconsump1a / famdispon_13[_n-1])
by familie_id (aar), sort: gen rdiffconsump3 = (diffconsump3 / famdispon_13[_n-1])
by familie_id (aar), sort: gen rdiffrestobl = (diffrestobl / famdispon_13[_n-1])
by familie_id (aar), sort: gen rdiffrestknt = (diffrestknt / famdispon_13[_n-1])
by familie_id (aar), sort: gen rfamafdrag_for = (famafdrag_for / famdispon_13[_n-1])
by familie_id (aar), sort: gen rfamrente_bet_nu = (famrente_bet_nu / famdispon_13[_n-1])
by familie_id (aar), sort: gen rdiffconsump1a_ales = (diffconsump1a_ales / famdispon_13[_n-1])
by familie_id (aar), sort: gen rdiffconsump3_ales = (diffconsump3_ales / famdispon_13[_n-1])
by familie_id (aar), sort: gen rdiffliq_assets = (diffliq_assets / famdispon_13[_n-1])
by familie_id (aar), sort: gen rdydelse = (diffydelse / famdispon_13[_n-1])

by familie_id (aar), sort: gen rpensionsbidrag = (fampensindbetal / famdispon_13[_n-1])
by familie_id (aar), sort: gen rfamboligvaerdi = (famboligvaerdi / famdispon_13[_n-1])
by familie_id (aar), sort: gen rfamforbrug1a = (famforbrug1a / famdispon_13[_n-1])
by familie_id (aar), sort: gen rfamforbrug3 = (famforbrug3 / famdispon_13[_n-1])
by familie_id (aar), sort: gen rfamrestobl = (famrestobl / famdispon_13[_n-1])
by familie_id (aar), sort: gen rfambankgaeld = (fambankgaeld / famdispon_13[_n-1])
by familie_id (aar), sort: gen rliq_assets = (liq_assets / famdispon_13[_n-1])
by familie_id (aar), sort: gen rfamrentudgpr = (famrentudgpr / famdispon_13[_n-1])


*Censoring variables: top and bottom 1%
local varlist 	fam_finaktiver difffinaktiver famindestpi diffindstpi rdiffindstpi fambankgaeld famrestobl diffbankgaeld bidragsrate ///
				diffrestobl famforbrug1a diffconsump1a famforbrug3 diffconsump3 diffconsump1a_ales rdiffconsump1a_ales ///
				rdifffinaktiver rdiffliq_assets rdiffindstpi rdiffbankgaeld famafdrag_for difffamafdrag_for rdiffrestobl rdiffrestknt ///
				rdiffconsump1a rdiffconsump3 rdiffconsump1a_ales rdiffconsump3_ales famperindkialt_13 famdispon_13 famloenmv_13 rfamboligvaerdi dti ///
				fampensindbetal rpensionsbidrag famdispon_13 rfamafdrag_for rfamrente_bet_nu rfamboligvaerdi rfamrestobl rfamforbrug1a rfamforbrug3 rfambankgaeld rliq_assets ///
				rfamrentudgpr

foreach v in `varlist' {
cap drop `v'_cens
g `v'_cens=`v'
forval t=1998/2017 {
qui sum `v' if aar==`t',d
replace `v'_cens=r(p1) if `v'_cens<r(p1) & `v'_cens!=. & aar==`t'
replace `v'_cens=r(p99) if `v'_cens>r(p99) & `v'_cens!=. & aar==`t'
}
}
replace fammax=. if fammax>200

save "analysis.dta", clear

